import  requests
from  lxml import etree
from lxml.html import tostring
from pyquery import PyQuery as PQ

def get_page(url):
    headers = {
        'User-Agent':'MOzilla/5.0(Macintosh;Inter Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/65.0.3325.162 Safari/537.36'
    }
    response =requests.get(url,headers=headers)
    response.encoding = "utf-8"
    if response.status_code == 200:
        return response
    return None

# with open(f'spider_url.txt',"r+",encoding="utf-8") as f:
#     line_nu = []
#     for line in f:  # 一行行的把数据从硬盘加载到内存里读出来
#         if not 0:  # 读取前五行
#             line_nu.append(line.strip())
#             print(line_nu)


# for i in  line_nu:
#     r = requests.get(i)
#     selector = etree.HTML(r.text)
#     print(selector)

r = get_page('https://mp.weixin.qq.com/s?__biz=MzAxMjUyNDQ5OA==&mid=2653557633&idx=2&sn=dddd44db35a2dd04ecb0945cdd2574d2&chksm=806e3d3cb719b42af24afbd848061dbfa9eb3990f65fa30ba09ab157fd0e228bcb815b1cc55d#rd')
selector = etree.HTML(r.text)
# arctle_title = selector.xpath('//h2[@id="activity-name"]/text()')[0] #标题
# print(arctle_title)
# arctle_auther = selector.xpath('//span[@id="js_author_name"]/text()')[0]       #文章作者
# print(arctle_auther)
# arctle_community = selector.xpath('//a[@id="js_name"]/text()')[0]    #文章所在的社区
# print(arctle_community)
# article_content = selector.xpath('//*[@id="js_content"]')  # 文章内容
# article_content1 = tostring(article_content[0])
# article_content2 = str(article_content1,'utf8')
# with open("content.html","w+",encoding="utf8") as f:
#     f.write(article_content2)
# print(article_content2)

doc = PQ(r.text)
data = {
    "title": doc('#activity-name').text().replace(" ", ""),
    "author": doc('#js_name').text().replace(" ", ""),
    "date": doc('#publish_time').text(),
    "content": doc('#js_content > p').text(),
}
print(data["content"])


# with open(f'1.txt', "w+", encoding="utf-8") as f:
#     for i in arctle_content2:
#         f.write(i)
#         f.close()